sce <-readRDS(file.path("..", "outs", "01-sce.rds"))#sce_sub <- sce[,!sce$TissueSub %in% "LN"]sce_sub <- scesce_sub$Origin <- sce_sub$TissueSub |> forcats::fct_collapse("parenchymal"=c("Alveoles", "Kidney", "LSCC", "RCC"))sce_sub$Origin <- sce_sub$Origin |>droplevels()sce_sub$Origin <-relevel(sce_sub$Origin, ref="parenchymal")# To compare TLS in generalsce_sub$Origin_gen <- sce_sub$Origin |> forcats::fct_collapse("TLS"=c("E_TLS","SFL_TLS","PFL_TLS","Tcell_TLS"))sce_sub$Origin_gen <- sce_sub$Origin_gen |>droplevels()sce_sub$Origin_gen <-relevel(sce_sub$Origin_gen, ref="parenchymal")# Set up nested design matrix# Add a new column mixing patient and group variablesce_sub$PatTum <-paste0(sce_sub$PatientID, "_", sce_sub$TumorType)sce_sub$PatTum <- sce_sub$PatTum |> forcats::fct_collapse("1"=c("16690_LSCC", "r15660_RCC"),"2"=c("17776_LSCC", "r18773_RCC"),"3"=c("24136_LSCC", "r19101_RCC"),"4"=c("32288_LSCC", "r24784_RCC"),"5"=c("38234_LSCC", "r30616_RCC"),"6"=c("39160_LSCC", "r8527_RCC"))df <-data.frame(colData(sce_sub))
Multilevel design Model
We have an experiment with repeated measures (multiple measurements per Patient) and different sample locations (TLS type, parenchymal) as well as different tumor types (LSCC,RCC). To address this we fit different model:
a model to find tumor specific differences between sample origins (TLS/parenchyma etc) while accounting for patientID: ~ TumorType + PatTum:TumorType + TumorType:Origin.
a model to compare sample origins between tumor types not accounting for PatientID (see here): ~ TumorType:Origin.
Model to identify TLS-specific differences between tumor types (RCC vs LSCC). As patient ID and group are convoluted the patient information is skipped in this model. Instead the model considers an interaction term of the sample origin (TLS stage/parenchyma) and the tumor type.
To select TLS-specific DE genes specific to each tumor type we use results from model2. Results are then filtered for genes that are also significant in the parenchymal comparison of the tumor (excluded), if the genes were not shown to be specific to TLS maturation by showing significant differences in model1 comparisons (both_TLS). Genes only significant in the non parenchymal comparison of model2 (sig_TLS) and those significant in the TLS-spcific and parenchymal comparison of the tumor showing an opposite effect (opposite) were selected.
Code
sel_TLS <-function(res, pat, fdr){ pat_nam <-grep(pat, names(res), value = T) pat_nam <-grep("par", pat_nam, value = T) sub_res <-lapply(pat_nam, function(nam){ res_sel <- res[[nam]] |>filter(logFC >0& FDR < fdr) res_sel$gene <-rownames(res_sel)data.frame(res_sel) }) |>bind_rows(.id ="comp")}TLS_RCC <-sel_TLS(res_mod1, "RCC", fdr =0.05)TLS_RCC_gene <-unique(TLS_RCC$gene)write.csv(TLS_RCC_gene, paste0("../outs/TLS_RCC_genes.csv"))TLS_LSCC <-sel_TLS(res_mod1, "LSCC", fdr =0.05)TLS_LSCC_gene <-unique(TLS_LSCC$gene)write.csv(TLS_LSCC_gene, paste0("../outs/TLS_LSCC_genes.csv"))# General exclusion list# Genes sig DE in parenchymal comparison, but not sig in tumor-specific TLS maturationpar_res <- res_mod2[["par_inRCC_vs_LSCC"]] |>filter(FDR <0.05) |>data.frame()par_res$gene <-rownames(par_res)exclude <- par_res[!par_res$gene %in%c(TLS_RCC_gene, TLS_LSCC_gene),]write.csv(exclude, paste0("../outs/excluded_parenchymal_genes.csv"))sel_genes <-function(comp1, comp2, FDR_t =0.1){ log_FC_sub <- log_FC_tab |> dplyr::filter(contrast %in%c(comp1, comp2)) |>select(c(logFC,FDR,gene, F,contrast)) |>pivot_wider(names_from = contrast, values_from =c(logFC, FDR, F))# new colum names logFC_comp1 <-grep(paste0("logFC_", comp1), colnames(log_FC_sub), value = T) logFC_comp2 <-grep(paste0("logFC_", comp2), colnames(log_FC_sub), value = T) FDR_comp1 <-grep(paste0("FDR_", comp1), colnames(log_FC_sub), value = T) FDR_comp2 <-grep(paste0("FDR_", comp2), colnames(log_FC_sub), value = T)# Add sig threshold log_FC_sub <- log_FC_sub |>mutate(sig =case_sig(FDR1 =!!sym(FDR_comp1), FDR2 =!!sym(FDR_comp2), com1 = comp1, com2 = comp2, FDR_t = FDR_t))# Genes to exclude log_FC_sub <- log_FC_sub |>mutate(selected =case_when(# exclude all sig in both & same sign & not in TLS list sig %in%c("both") &sign(!!sym(logFC_comp1)) ==sign(!!sym(logFC_comp2)) &!gene %in%c(TLS_LSCC_gene, TLS_RCC_gene) ~"exclude",# include sig in both & same sign & in TLS list sig %in%c("both") &sign(!!sym(logFC_comp1)) ==sign(!!sym(logFC_comp2)) & gene %in%c(TLS_LSCC_gene, TLS_RCC_gene) ~"both_TLS",# sig all sig in TLS only sig %in% comp1 ~"sig_TLS", sig %in%c("both") &sign(!!sym(logFC_comp1)) !=sign(!!sym(logFC_comp2)) ~"opposite",.default ="none" ) ) par_nam <-grep("_par_", colnames(log_FC_sub)) log_FC_sub <- log_FC_sub |>select(-par_nam) }